Number of cluters of Kmedoids

Kmedoids_clusterN(dt)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
  1. error and time series representors both don’t have clusters. There are one dense area and some sparse points.
  2. forecast and accuracy representors can see clear clusters.

The below figure visualizes a euclidean distance matrix of time series.

visualizeDistance(dt_orig, "ts", "euclidean")

visualizeDistance(dt_orig, "error", "dtw")

visualizeDistance(dt_orig, "forecast", "euclidean")

visualizeDistance(dt_orig, "accuracy", "euclidean")

# Group Visualize

visualizeGroup(dt_orig, "accuracy", "euclidean", names = dt_names)

visualizeGroup(dt_orig, "forecast", "euclidean", names = dt_names)

why there is no group when cluster by time series and error?

summary(silhouette(pam(dt_orig$distance$ts$euclidean, diss=TRUE, k=2)))
## Silhouette of 304 units in 2 clusters from pam(x = dt_orig$distance$ts$euclidean, k = 2, diss = TRUE) :
##  Cluster sizes and average silhouette widths:
##        172        132 
## 0.08506153 0.02771729 
## Individual silhouette widths:
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.04200  0.01458  0.03447  0.06016  0.09480  0.23971
clusGap(t(representator.ts(dt_orig)), pam, K.max = 20)
## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = t(representator.ts(dt_orig)), FUNcluster = pam, K.max = 20)
## B=100 simulated reference sets, k = 1..20; spaceH0="scaledPCA"
##  --> Number of clusters (method 'firstSEmax', SE.factor=1): 3
##           logW   E.logW       gap      SE.sim
##  [1,] 7.333995 7.888536 0.5545410 0.003083401
##  [2,] 7.301210 7.877105 0.5758943 0.004615531
##  [3,] 7.284371 7.868750 0.5843791 0.004118626
##  [4,] 7.276279 7.861660 0.5853812 0.004007524
##  [5,] 7.270019 7.855084 0.5850655 0.003648205
##  [6,] 7.261755 7.849237 0.5874822 0.003468951
##  [7,] 7.254570 7.843358 0.5887877 0.003597396
##  [8,] 7.249161 7.837910 0.5887487 0.003436964
##  [9,] 7.242896 7.832425 0.5895290 0.003279700
## [10,] 7.236661 7.827282 0.5906213 0.003254337
## [11,] 7.231898 7.821979 0.5900812 0.003360779
## [12,] 7.224934 7.816943 0.5920087 0.003314935
## [13,] 7.218146 7.812012 0.5938663 0.003401122
## [14,] 7.212566 7.807062 0.5944960 0.003441033
## [15,] 7.207370 7.802136 0.5947660 0.003435307
## [16,] 7.202917 7.797240 0.5943230 0.003360371
## [17,] 7.195817 7.792535 0.5967181 0.003429480
## [18,] 7.190615 7.787596 0.5969809 0.003380901
## [19,] 7.183623 7.782825 0.5992028 0.003346182
## [20,] 7.176798 7.778073 0.6012746 0.003349642
summary(silhouette(pam(dt_orig$distance$error$euclidean, diss=TRUE, k=2)))
## Silhouette of 304 units in 2 clusters from pam(x = dt_orig$distance$error$euclidean, k = 2, diss = TRUE) :
##  Cluster sizes and average silhouette widths:
##         167         137 
## 0.006938547 0.004954501 
## Individual silhouette widths:
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.010820  0.001458  0.004883  0.006044  0.010209  0.058709

Overall statistics

avg_measure_fn(dt, metric = "rmsse") %>% arrange(total)

Overall rank mcb test

rank_compare(dt)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(measure)
## 
##   # Now:
##   data %>% select(all_of(measure))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo